Test Training Data Generation

library(terra)
library(sf)
library(tidyverse)
library(tidyterra)
library(leaflet)
library(knitr)

knitr::opts_knit$set(root.dir = "/Users/Anthony/Data and Analysis Local/NYS_Wetlands_GHG/")
knitr::opts_chunk$set(message = FALSE, cache = TRUE)

here::i_am("NWI_training_data.qmd")
library(here)

set.seed(11)

This is all data downloaded to a local folder

ny_nwi <- vect("Data/NWI/NY_shapefile_wetlands/NY_Wetlands.shp") |> terra::project("EPSG:4269")
ny_state <- vect("Data/NWI/NY_shapefile_wetlands/New York.shp") |> terra::project("EPSG:4269")
ny_hucs_1 <- vect("Data/WBD_04_HU2_GPKG/WBD_04_HU2_GPKG.gpkg", layer = "WBDHU12") |> tidyterra::filter(str_detect(states, "NY"))
ny_hucs_2 <- vect("Data/WBD_02_HU2_GPKG/WBD_02_HU2_GPKG.gpkg", layer = "WBDHU12") |> tidyterra::filter(str_detect(states, "NY"))
ny_hucs_3 <- vect("Data/WBD_05_HU2_GPKG/WBD_05_HU2_GPKG.gpkg", layer = "WBDHU12") |> tidyterra::filter(str_detect(states, "NY"))
ny_hucs <- terra::union(ny_hucs_1, ny_hucs_2) |> terra::union(y = ny_hucs_3) |> 
    dplyr::select(!ends_with("_1")) |> 
    dplyr::select(!ends_with("_2"))
writeVector(ny_hucs, "Data/NY_HUCS/NY_HUCS_08.gpkg", overwrite = TRUE)

This is for one HUC

test_huc <- ny_hucs |> dplyr::filter(str_detect(huc12, "020501020703"))
test_nwi_all <- ny_nwi |> terra::crop(y = test_huc) 
test_nwi_subset <- test_nwi_all |> 
    dplyr::filter(WETLAND_TY == "Freshwater Forested/Shrub Wetland" | WETLAND_TY == "Freshwater Emergent Wetland")

Create training data by spatSampleing the HUC and then extracting the wetland and upland points

The number of points could depend on the size of the Watershed

test_nwi_pts <- terra::spatSample(test_huc, method = "random", size = 1E4) # 1E4 = 10000
test_nwi_pts_wet <- terra::mask(test_nwi_pts, test_nwi_subset, inverse = FALSE) |> 
    terra::intersect(x = test_nwi_subset |> terra::buffer(-1)) |> # This attributes the points with NWI classes
    dplyr::mutate(MOD_CLASS = case_when(WETLAND_TY == "Freshwater Forested/Shrub Wetland" ~ "FSW",
                                        .default = "EMW"),
                  COARSE_CLASS = "WET") |>
    dplyr::select(MOD_CLASS, COARSE_CLASS)
test_nwi_pts_upl <- terra::mask(test_nwi_pts, test_nwi_all |> buffer(5), inverse = TRUE) |> 
    dplyr::mutate(MOD_CLASS = "UPL",
                  COARSE_CLASS = "UPL") |>
    dplyr::select(MOD_CLASS, COARSE_CLASS)

test_nwi_pts_all <- rbind(test_nwi_pts_upl, test_nwi_pts_wet)
glimpse(test_nwi_all)
#  A SpatVector 679 x 5
#  Geometry type: Polygons
#  Geodetic CRS: lon/lat NAD83 (EPSG:4269)
#  Extent (x / y) : ([75° 53' 47.49" W / 75° 46' 5.74" W] , [42° 23' 40.28" N / 42° 33' 7.63" N])

$ ATTRIBUTE  <chr> "L1UBH", "L1UBHh", "L1UBHh", "PEM1/SS1E", "PEM1/SS1E", "PEM…
$ WETLAND_TY <chr> "Lake", "Lake", "Lake", "Freshwater Emergent Wetland", "Fre…
$ ACRES      <dbl> 24.28460328, 112.72737950, 115.56355652, 1.46909221, 3.1166…
$ Shape_Leng <dbl> 1325.91388, 3933.78724, 4786.09754, 419.80582, 445.64377, 4…
$ Shape_Area <dbl> 98276.3029, 456191.5206, 467669.1218, 5945.2052, 12612.4960…
test_nwi_pts_all |> as.data.frame() |>  dplyr::group_by(MOD_CLASS) |> dplyr::summarise(count = n())
# A tibble: 3 × 2
  MOD_CLASS count
  <chr>     <int>
1 EMW          49
2 FSW         438
3 UPL        9114

Sometimes there are not enough points for a specific class. In this case this is EMW or emergent wetlands

We can spatSample for more emergent from the NWI data layer.

What’s better (I think…) is to convert the Emergent polygons to points then sub-sample 500 of them.

numEMW <- length(test_nwi_subset[test_nwi_subset$WETLAND_TY == "Freshwater Emergent Wetland"])
suppPoints <- test_nwi_subset |> 
    dplyr::filter(str_detect(WETLAND_TY, "Emergent")) |>
    terra::buffer(-10) |> # a negative buffer should remove points on the lines
    terra::as.points() |> 
    sample(size = c(numEMW, 100)[which.max(c(numEMW, 100))])  |> 
    dplyr::mutate(MOD_CLASS = "EMW",
                  COARSE_CLASS = "WET") |> 
    dplyr::select(MOD_CLASS, COARSE_CLASS) 

test_nwi_pts_all_supp <- rbind(test_nwi_pts_all, suppPoints)

test_nwi_pts_all_supp |> as.data.frame() |>  dplyr::group_by(MOD_CLASS) |> dplyr::summarise(count = n())
# A tibble: 3 × 2
  MOD_CLASS count
  <chr>     <int>
1 EMW         149
2 FSW         438
3 UPL        9114

This is a visualization to show the points and NWI polygons

s <- svc(test_nwi_subset, test_nwi_pts_all_supp)
names(s) <- c("Wetland Polygons", "Wetland Points")
plet(s, c("WETLAND_TY", "MOD_CLASS"))
Warning: SpatVector layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
Need '+proj=longlat +datum=WGS84'
Warning: SpatVector layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
Need '+proj=longlat +datum=WGS84'

Write the points data to a file

writeVector(test_huc, "Data/NY_HUCS/Middle_Genegantslet_020501020703.gpkg", overwrite = TRUE)
writeVector(test_nwi_pts_all_supp, "Data/Training_Data/test_nwi_pts_all_supp.gpkg", overwrite = TRUE)